import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import re
# %pip install nfft
from nfft import nfft
from imageio import imread, imsave
from itertools import product, permutations
from scipy.linalg import expm
from scipy.optimize import minimize
Handy for homework.
# Yields the n by n Hilbert matrix
def Hilbert(n):
out = np.empty((n,n))
for i, j in product(range(n),repeat=2):
out[i,j] = 1 / (i + j + 1)
return out
g= Hilbert(5)
print(g)
print(np.linalg.cond(g))
# Checkerboard of 1 and -1
def checkerboard(n):
out = np.empty((n,n))
for i, j in product(range(n),repeat=2):
out[i,j] = (-1)**(i+j)
return out
print(checkerboard(4))
print((1 - checkerboard(4)) / 2)
# A tridiagonal matrix with the given pattern
def tridiag(n,pattern=(-1,2,-1)):
a, b, c = pattern
return a*np.eye(n,k=-1) + b*np.eye(n) + c*np.eye(n, k=1)
print(tridiag(6,pattern=(-1, 4, 2)))
# Yields all square matrices of a given n whose entries are 0 or 1
def Hadamards(n):
return np.array(list(product((0,1),repeat=n**2))).reshape(2**(n**2),n,n)
print(Hadamards(3)[[23,87,120]])
# Yields all permutation matrices of a given n
def perm_mats(n):
perms = list(permutations(range(n)))
out=np.zeros((len(perms), n, n))
for j in range(len(perms)):
for i in range(n):
out[j,i,perms[j][i]]=1
return out
print(perm_mats(4)[[0,8,22]])
Only one entry so far. cofactor(A) yields the cofactor matrix of $A$ as defined in Strang, Linear Algebra and Its Applications 4e, 4.3:
$C_{ij} = (-1)^{i+j}\det M_{ij}$
Where $M_{ij}$ is the $(n-1)\times(n-1)$ submatrix that results from eliminating the $i$ row and $j$ column of $A$.
Then taking the dot product of any single row of $A$ with any single row of $C$ yields $\det{A}$.
def cofactor(A):
n = len(A)
out = np.zeros((n,n))
for i in range(n):
for j in range(n):
M = A
M = np.delete(M, i, axis=0)
M = np.delete(M, j, axis=1)
out[i,j]=np.linalg.det(M)*(-1)**(i+j)
return out
g = tridiag(4,pattern=(-1,4,3))
print(g)
print(cofactor(g))
# Use the dot product of any two rows to get the determinant
print(g[2] @ cofactor(g)[2])
# Numpy's evaluation of the same
print(np.linalg.det(g))
Reference: Gilbert Strang, Linear Algebra and Its Applications 4e, sections 5.3 and 5.4.
arr or $A$ gives a random discrete Markov matrix indicating the relationship of next year’s population to this one. It satisfies the defining property of a discrete Markov matrix:
$\sum_{i=1}^n A_{ij} = 1$ for all $j$
That is, the sum of each column is one.
arr_cont or $A'$ gives the continuously-compounded form of the same, which satisfies a similar equation:
$\sum_{i=1}^n A'_{ij} = 0$ for all $j$
arr_cont equals simply $A - I$. This is easy to see by analogy with $(1+r)^{t}$ and $e^{rt}$, respectively the discrete and continuous forms of the one-dimensional exponential growth equation. In matrix form, the equations become:
Both matrices have the same eigenvectors (and the eigenvalues are simply reduced by one in arr_cont). The graph below suggests that the continuous form approaches its eigenvectors slightly faster.
n = 6
np.random.seed(1987)
# A random n-by-n Markov matrix
arr = np.random.random_sample((n,n))
arr = arr / arr.sum(axis=0)
# and its continuous form (just subtract I)
arr_cont = arr - np.eye(n)
# n random population start values
u0 = np.random.randint(51, size=n)
print(arr)
print(arr.sum(axis=0))
arr has its largest eigenvalue $\lambda_1 = 1$. Thus, we expect the matrix to converge on the corresponding eigenvector $\eta_1$. For our matrix $\eta_1$ is complex, so there will be oscillatory behavior as $t \to \infty$, but in this case the oscillations are gentle enough that they do not show on the plot.
# Eigenvalues of arr
print(np.linalg.eigvals(arr))
# First eigenvector of arr
print(np.linalg.eig(arr)[1][0])
t = 6
# Discrete results from time 0 to t
results_discrete = np.stack([np.linalg.matrix_power(arr, i) @ u0 for i in range(t+1)])
x_discrete = range(t+1)
# Continuous results from time 0 to t
results_cont = np.stack([expm(t*arr_cont) @ u0 for t in np.linspace(0,t,101)])
x_cont = np.linspace(0,t,101)
# Plot
plt.figure(figsize=(8,10))
plt.ylabel('population')
plt.xlabel('time')
plt.title('Comparison of discrete and continuous Markov processes')
plt.plot(x_discrete, results_discrete, color='yellowgreen')
plt.plot(x_cont, results_cont, color='seagreen')
# Legend
plt.plot([],[], color='yellowgreen', label='discrete')
plt.plot([],[], color='seagreen', label='continuous')
plt.legend()
References:
Public-domain image from the Biodiversity Heritage Library.
Singular value decomposition (SVD) factors a matrix into three components:
$$A = U S V^H$$The middle $S$ is a diagonal matrix containing the square roots of the eigenvalues of $A^H A$ (or $A A^H$). Because $S$ is sorted from greatest $\sigma$ to least, we can zero out the tail end of $S$ as well as the corresponding columns of $U$ and $V$ to produce a lightweight set of matrices whose products approximate $A$.
Images are matrices, so this method is the basis of many widely used image-compression algorithms. Here, I give a practical demonstration of how SVD works and the level of quality one can expect when compressing a typical image.
First, we read in the image file. It’s a JPEG, so technically it’s already been compressed (you can even see some compression artifacts if you squint), but we won’t sweat it.
I cropped the image a bit at this stage to show its detail. The area we are working with is 1200 by 1300 pixels; these will be the eventual dimensions of $S$ in SVD. The array has an additional dimension of 3, corresponding to the image’s red, green, and blue channels.
img = imread('linalg-img2.jpg')[500:1700,0:1300,:]
img.shape
plt.figure(figsize=(12,13))
plt.axis('off')
plt.imshow(img)
For now, I just want to work with one channel, so I will average the three together to produce a grayscale version of the image.
img_bw = img.mean(axis=2)
plt.figure(figsize=(12,13))
plt.axis('off')
plt.imshow(img_bw,cmap='gray')
Make a function that factors the image array using SVD and discards all but a specified portion of the entries. This function outputs three arrays, left,sigma, and right, that correspond to $U$, $S$, and $V^H$. That is, all you have to do is matrix-multiply them together to recover an approximation for the original array.
def svd_prep(img, ratio=1):
left, diag, right = np.linalg.svd(img)
# Number of sigma values to keep
keep = int(np.ceil(diag.size * ratio))
# Discard unused columns and rows
diag = diag[:keep]
left = left[:,:keep]
right = right[:keep]
# Superimpose sigma onto a correctly shaped array of zeros
sigma = np.zeros((keep, keep))
for i in range(len(diag)):
sigma[i,i] = diag[i]
return(left, sigma, right)
Make sure the SVD is working; no compression yet.
a, b, c = svd_prep(img_bw,1)
np.allclose((a @ b @ c),img_bw,rtol=1e-64)
Here are the compression ratios we’ll try.
ratios = np.exp(np.arange(9) - 8)
ratios
Compress and reconstruct the images. Now compressed_images contains nine different approximations of the original array. This cell takes several seconds to run on my computer.
compressed_images = []
for r in ratios:
a, b, c = svd_prep(img_bw,r)
# Divide by 255 because we aren't working with ints anymore;
# matplotlib likes floats between 0 and 1
b = b / 255
compressed_images.append(a @ b @ c)
Let’s take a look at the compressed images. Note that substantial detail is preserved at only 13.5% of the filesize.
plt.figure(figsize=(14,14))
plt.suptitle('Image compression by discarding small singular values in SVD',y=.9,size=16)
plt.subplots_adjust(hspace=0.1,wspace=0.05)
for i in range(9):
plt.subplot(3,3,i+1)
plt.imshow(compressed_images[i],cmap='gray')
plt.title("SVD compression ratio: {}".format(np.round(ratios[i],decimals=4)),y=-.05,va="top")
plt.axis('off')
Now, let’s see if we can do the same thing while preserving the color. First, we split the image into three arrays, one corresponding to each channel of the image. We perform SVD on these arrays separately, then recombine. We’ll use a 10% compression ratio this time, which yields decent results while still making the compression visible.
img_r, img_g, img_b = img[:,:,0], img[:,:,1], img[:,:,2]
compressed_channels=[]
for i in [img_r, img_g, img_b]:
a, b, c = svd_prep(i,0.1)
compressed_channels.append(a @ b @ c)
Recombining the channels.
compressed_rgb = np.stack(compressed_channels,axis=2) / 255
compressed_rgb.shape
Let’s compare our compressed image to the original. Take a close look at the background if you can’t see the difference.
Matplotlib throws a warning because the approximate SVD decomposition has produced negative values in its approximation of $A$; clipping these to 0, which matplotlib does by default, is an acceptable remedy.
plt.figure(figsize=(14,7))
plt.subplots_adjust(hspace=0,wspace=0.05)
plt.suptitle('SVD image compression using three channels',y=.9,size=14)
plt.subplot(1,2,1)
plt.imshow(compressed_rgb)
plt.title("SVD compression ratio: 0.1",y=-.04,va="top")
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(img)
plt.title("No compression",y=-.04,va="top")
plt.axis('off')
Reference: Gilbert Strang, Linear Algebra and Its Applications 4e, section 7.3.
Let $A_{k+1} = R_k Q_k$ where $Q_k R_k$ is the QR factorization of $A_k$.
This process creates matrices similar to $A$ that get iteratively more upper triangular. The values along the diagonal then approach the eigenvalues of $A$.
Note that I leave the actual work of finding the QR factorizations to numpy.
# Performs k iterations of the QR algorithm
def unshifted_qr(A, k):
if k == 0:
return A
w = np.linalg.qr(unshifted_qr(A,k-1))
return w[1] @ w[0]
# A random matrix
np.random.seed(2048)
A = np.random.random_sample((6,6))
iters = 120
demo_iters = 12
demo = np.empty((iters,6,6))
for i in range(iters):
demo[i] = unshifted_qr(A, i)
with np.printoptions(precision=3, suppress=True):
print(demo[0:3])
Sum up and plot the entries of the main diagonals and subdiagonals. The sum of the main diagonal should stay constant since the trace equals the sum of the eigenvalues. The others should converge toward zero.
You can experiment with representing the convergence using the vector length, mean, or absolute-value sum of the diagonal entries instead.
sums = np.array([[np.diag(demo[i],k).sum() for k in -np.arange(6)[::-1] ] for i in range(demo_iters)])
sums_x = range(demo_iters)
plt.figure(figsize=(9,8))
for i in range(6)[::-1]:
if i==5:
plt.plot(sums_x, sums[:,5], label='Sum of main diagonal')
if i!=5:
plt.plot(sums_x, sums[:,i], label='Sum of s{} subdiagonal'.format(5-i))
plt.legend()
plt.title('Sum of subdiagonal entries as A is made upper triangular by QR')
plt.ylabel('sum')
plt.xlabel('iterations')
print(np.linalg.eigvals(A))
The $s_1$ subdiagonal (orange in the plot above) fails to converge. Instead, it oscillates because A has complex eigenvalues.
The following plot shows the sum of the $s_1$ subdiagonal through 120 iterations of QR.
plt.figure(figsize=(9,8))
plt.plot([np.diag(i,-1).sum() for i in demo], label='Sum of s1 subdiagonal')
plt.ylabel('sum')
plt.xlabel('iterations')
plt.title('Oscillation of subdiagonal entries after many iterations of QR')
plt.legend()
Reference: Gilbert Strang, Linear Algebra and Its Applications 4e, section 7.4.
Here I compare three different techniques for splitting a matrix $A = S - T$ and solving $Ax = b$ iteratively using the sometimes-convergent difference equation $Sx_{k+1} = Tx_k + b$.
Included is a function that determines from $A$ the optimal $\omega$ to be used in SOR.
def best_omega(A):
"""
Computes the best omega value to use to split A for SOR.
"""
L = np.tril(A, k=-1)
D = np.diag(np.diag(A))
U = np.triu(A, k= 1)
mu2 = np.linalg.eigvals(np.linalg.inv(D) @ (-L - U)).max() ** 2
return 2 * (1 - np.sqrt(1 - mu2) ) / mu2
def ST(A, omega=1):
"""
Splits A into S - T in preperation for iterative solving of Ax = b.
Defaults to omega=1 (Gauss-Seidel method).
Setting omega=0 yields Jacobi split where S = diag(a)
"""
if omega==0:
S = np.diag(np.diag(A))
T = S - A
return S, T
L = np.tril(A, k=-1)
D = np.diag(np.diag(A))
U = np.triu(A, k= 1)
S = D / omega + L
T = (1 / omega - 1) * D - U
return S, T
def ST_iter(S, T, x0, b, n_iterations):
"""
Recursively compute the nth iteration of A according to the
difference equation Sx_k+1 = Tx_k + b.
Will fail to converge for many matrices.
"""
if n_iterations==0:
return x0
left = S
right = T @ ST_iter(S, T, x0, b, n_iterations - 1) + b
return np.linalg.solve(left, right)
np.random.seed(1987)
# Convergent example with tridiagonal A, random x0
A = np.eye(5) * 2 - np.eye(5,k=1) - np.eye(5,k=-1)
x0 = np.random.randint(0,100,size=5)
b = np.arange(12,17)
n_iterations = 30
print(A)
print(x0)
S, T = ST(A, omega=0)
jacobi = np.vstack([ST_iter(S, T, x0, b, n) for n in range(n_iterations)])
S, T = ST(A, omega=1)
GS = np.vstack([ST_iter(S, T, x0, b, n) for n in range(n_iterations)])
S, T = ST(A, omega=best_omega(A))
SOR = np.vstack([ST_iter(S, T, x0, b, n) for n in range(n_iterations)])
# Plot
plt.figure(figsize=(10,8))
plt.ylabel('guess for x')
plt.xlabel('iterations')
plt.plot(range(n_iterations), jacobi, color='lightskyblue')
plt.plot(range(n_iterations), GS, color='dodgerblue')
plt.plot(range(n_iterations), SOR, color='navy')
# Legend
plt.plot([],[], color='lightskyblue', label='Jacobi')
plt.plot([],[], color='dodgerblue', label='Gauss-Seidel')
plt.plot([],[], color='navy', label='Successive overrelaxation')
plt.legend()
plt.title('Comparison of iterative methods for solving Ax = b')
My best guess vs. numpy’s “real” answer.
print(SOR[-1,:])
print(np.linalg.solve(A, b))
with np.printoptions(precision=5):
print(SOR[-1,:] - np.linalg.solve(A, b))
Reference: Gilbert Strang, Linear Algebra and Its Applications 4e, section 8.1.
I’ll demonstrate how to use Scipy’s optimize model to solve a linear programming problem. This is more of a coding exercise than a linear algebra execise, but there were some hitches in setting it up that are worth clarifying.
The problem we’re looking at is problem #9 from the text cited above:

The author just wants us to set up the problem, but I will provide a full solution.
First, we must decide on our “data structure.” scipy.optimize wants our variable to be one dimensional, but this problem will make more sense if we can preserve the meshing of origin and destination implied in the problem. I organize the distances like this:
where the vertical axis gives the destination (Chicago, New England) and the horizontal axis gives the source (TX, CA, AK).
Then, if we store the number of barrels to be shipped in a 2-by-3 matrix $X$, the cost function is simply the sum of the entries in the Hadamard product $X \circ D$. Let’s put that in Numpy.
# Cost matrix (D above)
distances = np.array([[1000, 2000, 3000],
[1500, 3000, 3700]])
# Cost function (what we will be minimizing)
def cost(x):
# Restore the (2,3) shape because x gets flattened inside scipy.minimize
x = x.reshape(2,3)
return (distances * x).sum()
Now, we can start implementing our constraints. First, we have that each state produces exactly a million barrels of oil. In linear algebra terms, this means that summing the rows of the cost matrix $X \circ D$ should yield a matrix of 1,000,000, repeated thrice. That is,
$$\sum_{\text{rows}} X \circ D = \begin{bmatrix} 1,000,000 & 1,000,000 & 1,000,000 \end{bmatrix}$$scipy.optimize likes the constraints to be set equal to zero, so we subtract the millions matrix from the sums matrix to define production_constraint(x).
def production_constraint(x):
x = x.reshape(2,3)
return x.sum(axis=0) - np.ones(3)*1e6
Similarly, the text says that Chicago needs 800,000 barrels of oil and New England needs 2,200,000. In similar fashion, we get our second constraint from comparing those numbers to the sum of the columns of the cost matrix:
$$\sum_{\text{columns}} X \circ D \geq \begin{bmatrix} 800,000 \\ 2,200,000 \end{bmatrix}$$Why did I change the equality sign to an “at least”? I know from inspection that the optimal solution will never have us deliver more than the minimum amount of oil to each destination—this is a linear problem, after all. More importantly (and this is where I got stuck for a while), scipy.optimize throws an error if you give it only equality constraints.
def demand_constraint(x):
x = x.reshape(2,3)
return x.sum(axis=1) - np.array([8,22])*1e5
The third constraint is that all the entries of $X$ must be nonnegative. In Scipy, simple constraints like this are implemented as “bounds.”
bounds = [[0,None]]*6
Next, we bind the constraints into a list of dictionaries that scipy.optimize can understand. This is where I input those equality and inequality signs.
constraints = [{'type': 'eq', 'fun': production_constraint},
{'type': 'ineq', 'fun': demand_constraint}]
The minimize function from scipy.optimize works iteratively, so we need to provide it with a guess for $X$, x0. Let’s start by just sending the same amount of oil from each place. I chose numbers that, although not optimal, satisfy both constraints.
x0 = np.array([[8e5 / 3]*3,
[2.2e6 / 3]*3])
# Should both be zero
print(production_constraint(x0))
print(demand_constraint(x0))
Now we let Scipy do its thing.
minimize(cost, x0=x0, constraints=constraints, bounds=bounds)
answer = _.x.reshape(2,3)
answer_cost = _.fun
print(np.round(answer))
print(np.round(answer_cost))
There’s our answer. In plain English, we will achieve the lowest cost by having Chicago get all 800,000 of its oil barrels from nearbyish California. All the other oil goes to needy New England. Note that as expected, the optimal solution does achieve equality in the demand constraint. The cost itself is $7.4 billion.
def io_helper(s):
x, y = np.array([float(i) for i in filter(None,re.split(r'[,\s]',s))]).reshape(-1, 2).T
return x, y
Given $n$ points $x, y$, fit a model in the form $y = a x^b$ and compute the model fit $r^2$.
Taking the natural log on both sides, we have
$$\begin{align} y & = ax^b \\ \log y & = \log a + b \log x \\ \begin{bmatrix} \log y_1 \\ \log y_2 \\ \vdots \\ \log y_n \\ \end{bmatrix} & = \begin{bmatrix} 1 & \log x_1 \\ 1 & \log x_2 \\ \vdots & \vdots \\ 1 & \log x_n \\ \end{bmatrix} \begin{bmatrix} \log a \\ b \end{bmatrix} \end{align}$$Thus, working with the transformed data, we have a linear matrix equation that can be solved by least squares for $A = \log a$ and $b$.
# Import some test points
tmp = "3, 17 6, 17.6 4.35, 15.25 9.2, 13.65 7.65, 10.5 12.75, 9.35 12, 7.7 19.2, 5.1 16.05, 6.1 23.9, 4.1 37.6, 3.2 16.25, 14.2 31.2, 7.1 31.7, 2.9"
x, y = io_helper(tmp)
# Log each side
lx, ly = np.log(x), np.log(y)
# X matrix with ones in first column
X = np.stack([np.ones_like(lx), lx]).T
# Fit model
A, b = np.linalg.lstsq(X, ly, rcond=None)[0]
a = np.exp(A)
# Compute model fit
y_pred = np.exp( X @ [A, b])
r2 = 1 - ( sum((y - y_pred)**2)
/ sum((y - y.mean())**2) )
# Numbers we care about
a, b, r2
x_plot = np.linspace(x.min(), x.max(), 51)
y_plot = a*x_plot**b
fig, ax = plt.subplots(figsize=(10,8))
ax.scatter(x,y, color="black")
ax.plot(x_plot, y_plot)
ax.text( (plt.xlim()[1] - plt.xlim()[0]) /2,
ax.get_yticks()[-2],
"$y = {}x^{{{}}}$\n$r^2$ = {}".format(np.round(a, 4), np.round(b, 4), np.round(r2, 4)),
size=15,
horizontalalignment='center',
verticalalignment='top')
plt.title("Power model", size=18)
Given $n$ points $x, y$, fit a model in the form $y = ce^{kx}$ and compute the model fit $r^2$.
Taking the natural log on both sides, we have
$$\begin{align} y & = ce^{kx} \\ \log y & = \log c + kx \\ \begin{bmatrix} \log y_1 \\ \log y_2 \\ \vdots \\ \log y_n \\ \end{bmatrix} & = \begin{bmatrix} 1 & x_1 \\ 1 & x_2 \\ \vdots & \vdots \\ 1 & x_n \\ \end{bmatrix} \begin{bmatrix} \log c \\ k \end{bmatrix} \end{align}$$Thus, working with the transformed data, we have a linear matrix equation that can be solved by least squares for $C = \log c$ and $k$.
The equation could also be written as $y = e^{kx+C}$.
# Import some test points
tmp = "2.5, 2.65 8.35, 1.85 6.5, 3.65 14.65, 3.85 13.55, 6.9 20.7, 6.75 16.7, 8.35 24, 9.1 22.95, 12.5 26.65, 13.35 32.3, 16.1 31.25, 17.6 25.3, 18.3 12.05, 3.05"
x, y = io_helper(tmp)
# Log y side
ly = np.log(y)
# X matrix with ones in first column
X = np.stack([np.ones_like(x), x]).T
# Fit model
C, k = np.linalg.lstsq(X, ly, rcond=None)[0]
c = np.exp(C)
# Compute model fit
y_pred = np.exp( X @ [C, k] )
r2 = 1 - ( sum((y - y_pred)**2)
/ sum((y - y.mean())**2) )
# Numbers we care about
c, k, r2
x_plot = np.linspace(x.min(), x.max(), 51)
y_plot = c * np.exp(k * x_plot)
fig, ax = plt.subplots(figsize=(10,8))
ax.scatter(x,y, color="black")
ax.plot(x_plot, y_plot)
ax.text( (plt.xlim()[1] - plt.xlim()[0]) /2,
ax.get_yticks()[-2],
"$y = {}e^{{{}x}}$\n$r^2 = {}$".format(np.round(c, 4), np.round(k, 4), np.round(r2, 4)),
size=15,
horizontalalignment='center',
verticalalignment='top')
plt.title("Exponential model", size=18)
Given $n$ points $x, y$, fit a model in the form $y = \alpha \sin (tx) + \beta \cos(tx) + \delta$ and compute the model fit $r^2$. This form describes a wave of constant amplitude; its frequency is $\frac{t}{2\pi}$, its amplitude and phase offset are determined by $\alpha$ and $\beta$, and $\delta$ gives the vertical offset.
For now, assume $t$ is known, and hopefully it is obvious that $\delta = \bar y$. Then we can fit the model by solving
$$\begin{align} y - \bar y & = \alpha \sin (tx) + \beta \cos(tx) \\ \begin{bmatrix} y_1 - \bar y \\ y_2 - \bar y\\ \vdots \\ y_n - \bar y\\ \end{bmatrix} & = \begin{bmatrix} \sin t x_1 & \cos t x_1 \\ \sin t x_2 & \cos t x_2 \\ \vdots & \vdots \\ \sin t x_n & \cos t x_n \\ \end{bmatrix} \begin{bmatrix} \alpha \\ \beta \end{bmatrix} \end{align}$$Thus, working with the transformed data, we have a linear matrix equation that can be solved by least squares for $\alpha$ and $\beta$.
Now, how do you find $t$? We need to estimate the frequency $f = \frac{t}{2\pi}$ of the given data. Clearly, the wave completes about two periods in the interval $(0,40)$, suggesting $t = 2 \pi \frac {2}{40}$, so we can try values of $t$ in the interval $(\frac {4\pi}{42}, \frac {4\pi}{38})$ to see which has the lowest $r^2$.
(In the implementation, what I actually do is try values of $t = \frac {4\pi}{s}$ for $s \in [38, 42]$.)
tmp = "0.65, 8.5 1.85, 11.35 10.9, 8.75 21, 8.55 31.35, 8.65 41.95, 8.55 39.5, 11.35 37.6, 16.3 34.3, 16.05 34, 12.65 32.35, 10.15 31.2, 6.1 29.9, 2.65 26.3, 0.2 22.95, 2.15 23, 3.7 22.35, 5.35 20.85, 9.9 19.7, 14.3 15.9, 14.3 12.1, 12 11, 7.25 7.8, 3.25 4.75, 2.85 2.05, 5.65 0.7, 9.55 3.3, 4.3 8, 5.05 10.45, 6.75 12.3, 14.15 15.45, 15.45 6, 1.85 17.2, 16.05 37.65, 14.8"
x, y = io_helper(tmp)
# Sort the points for FFT
y = y[np.argsort(x)]
x = x[np.argsort(x)]
d = y.mean()
test_t, test_r2 = [],[]
# Try out different t values
for i in np.linspace(38, 42, 101):
t = 4 * np.pi /i
# X matrix with ones in first column
X = np.stack([np.sin(t*x), np.cos(t*x)]).T
# Fit model
a, b = np.linalg.lstsq(X, y - d, rcond=None)[0]
# Compute model fit
y_pred = X @ [a, b] + d
r2 = 1 - ( sum((y - y_pred)**2)
/ sum((y - y.mean())**2) )
test_t.append(t)
test_r2.append(r2)
# Plot t, r2 values
fig, ax = plt.subplots(figsize=(12,7))
ax.plot(test_t, test_r2)
ax.set_ylabel('$r^2$', size=16)
ax.set_xlabel('$t$', size=16)
plt.title("Estimating $t$ (bootstrap method)", size=18)
For a more mathematically “pure” approach to determining $t$, we can create a spectrogram of the input data (signal) and then see which frequency is most prominent. I haven’t figured out how to create a spectrogram for nonuniformly spaced data, as we have here, but there are two approaches we might try.
The first approach would be to interpolate the sample data so that it mimics a uniform sample.
def interpolator(x, y, step):
out = []
for i, j in enumerate(x[:-1]):
dy = y[i+1] - y[i]
steps = np.arange(j, x[i+1], step)
out.append(np.array([[b, y[i] + dy*a/steps.size] for a, b in enumerate(steps)]))
return np.vstack(out).T
step = 0.05
x_int, y_int = interpolator(x, y, step)
fig, ax = plt.subplots(figsize=(12,7))
ax.psd(y_int)
ax.set_xlabel('frequency', size=16)
ax.set_ylabel('density', size=16)
plt.title("Estimating $t= 2\pi f$\n(power spectral density method, interpolated data)", size=18)
plt.xlim(0,1)
There is clearly a pattern here, but I don’t know how to interpret this graph yet.
A further idea is to take the nonuniform discrete Fourier transform of the data. There is a Python implementation of the NFFT available via pip install nfft. But I’m not certain how you turn the transformed data into a spectrogram with normal-looking frequencies on the x-axis.
Fy = nfft(x, y)
fig, ax = plt.subplots(figsize=(12,7))
# This is wrong, because plt.psd() assumes uniformly spaced frequencies
ax.psd(Fy)
ax.set_xlabel('frequency', size=16)
ax.set_ylabel('density', size=16)
plt.title("Estimating $t= 2\pi f$\n(NFFT method, bad implementation)", size=18)
plt.xlim(0,1)
So, let’s just use the estimate for $t$ we got from our bootstrap method instead.
# Best t from bootstrap method
t = test_t[np.argmax(test_r2)]
# X matrix with ones in first column
X = np.stack([np.sin(t*x), np.cos(t*x)]).T
# Fit model
a, b = np.linalg.lstsq(X, y - d, rcond=None)[0]
# Compute model fit
y_pred = X @ [a, b] + d
r2 = 1 - ( sum((y - y_pred)**2)
/ sum((y - y.mean())**2) )
# Numbers we care about
a, b, r2
x_plot = np.linspace(x.min(), x.max(), 201)
y_plot = a*np.sin(t*x_plot) + b*np.cos(t*x_plot) + d
fig, ax = plt.subplots(figsize=(12,7))
ax.scatter(x,y, color="black")
ax.plot(x_plot, y_plot)
plt.ylim(-1,21)
plt.yticks(np.arange(0, 21, 2))
ax.text( (plt.xlim()[1] - plt.xlim()[0]) /2,
ax.get_yticks()[-1],
"$y = {} \sin({}x) + {} \cos({}x) + {}$\n$r^2 = {}$".format(np.round(a,4),np.round(t,4),np.round(b,4),np.round(t,4),np.round(d,4),np.round(r2,4)),#.format(np.round(c, 4), np.round(k, 4), np.round(r2, 4)),
size=15,
horizontalalignment='center',
verticalalignment='top')
plt.title("Wave model", size=18)
Reference: Robert J. Vanderbei, Linear Programming: Foundations and Extensions, section 8.2.
We often have to solve sparse linear systems $Ax = b$ for a large number of right-hand sides. In such situations, it is useful to have an LU factorization of $A$ on hand. When $A$ is sparse, we can use the minimum-degree heuristic to permute its rows and columns during elimination, producing a factorization
$$A = P_\text{rows} LU P_\text{columns} $$that is as sparse as possible. One advantage of this heuristic is that the systems
$$Ly = b \\ Ux = y$$are easier to solve than those resulting from a more naive LU algorithm. A disadvantage is that the algorithm occasionally breaks for poorly-conditioned matrices (e.g. if $A$ has many entries close to zero).
I implement the minimum-degree heuristic in Numpy and plot its performance for matrices of various sizes and sparsities.
def sparse_LU(A, precision = 1e-12):
"""
Performs an LU factorization of A, permuted using the minimum-degree heuristic
to preserve sparsity. Returns
P_rows, L, U, P_cols
such that
A = P_rows @ L @ U @ P_cols
This algorithm is efficient, but may fail for poorly conditioned A.
"""
assert A.shape[0] == A.shape[1], "Matrix must be square"
size = A.shape[0]
U = np.array(A).astype('float64')
L = np.zeros_like(U)
row_perm = np.arange(size)
col_perm = np.arange(size)
for i in range(size):
rowswap = i + np.argmin(np.sum(U[i:,i:] != 0, axis=1))
colswap = i + (np.sum(U[:,i:] != 0, axis=0) * np.ma.masked_equal(U[rowswap,i:], 0)).argmin()
U[[i,rowswap]] = U[[rowswap,i]]
L[[i,rowswap]] = L[[rowswap,i]]
U[:,[i,colswap]] = U[:,[colswap,i]]
# No need to swap cols of L, since they are still zero
row_perm[[i, rowswap]] = row_perm[[rowswap, i]]
col_perm[[i, colswap]] = col_perm[[colswap, i]]
assert U[i,i] != 0, "Singular matrix"
assert np.abs(U[i,i]) > precision, \
"Poorly-conditioned matrix\n Pivot: {}\n Cond: {}".format(U[i,i], np.linalg.cond(A))
L[i:,i] = U[i:,i]
U[i+1:] -= np.outer(U[i+1:,i], U[i]/U[i,i])
L = L @ np.diag(1/np.diag(L))
# For a permutation matrix, P.T = P.I
P_rows, P_cols = np.eye(size)[row_perm].T, np.eye(size)[col_perm]
return P_rows, L, U, P_cols
np.random.seed(394857)
sample_size = 5000
sample_sparsity = np.random.rand(sample_size)
sample_matsize_range = (2, 101)
sample_matsize = np.random.randint(*sample_matsize_range, size=sample_size)
B_sparsity = []
LU_sparsity = []
degenerate_count = 0
%%time
# Random nonsingular, sparse B
for i in range(sample_size):
B = np.random.randn(sample_matsize[i], sample_matsize[i]) \
* (np.random.rand(sample_matsize[i], sample_matsize[i]) < sample_sparsity[i]) \
+ np.random.permutation(np.eye(sample_matsize[i]))
try:
P_rows, L, U, P_cols = sparse_LU(B)
except:
degenerate_count += 1
B_sparsity.append(None)
LU_sparsity.append(None)
else:
B_sparsity.append(np.mean(B==0))
LU_sparsity.append(np.mean([L==0, U==0]))
print("Degenerate matrices: {} / {}".format(degenerate_count, sample_size))
cmap = cm.get_cmap('jet')
fig, ax = plt.subplots(1, 1, figsize = (12,10))
ax.set_title("Minimum-degree heuristic: Preserving sparsity in $A = LU$", size=17)
plt.scatter(B_sparsity, LU_sparsity, c=sample_matsize, cmap=cmap, marker=".", alpha=0.5)
cb = plt.colorbar()
cb.set_label('Size of $A$', size=14)
cb.set_alpha(1); cb.draw_all()
ax.set_ylabel(r'Sparsity of $\left[~L~U~\right]$', size=14)
ax.set_xlabel(r'Sparsity of $A$', size=14)
ax.set_xlim(0,1)
ax.set_ylim(0,1)
This is a very interesting graph. The sparsity of $A$ is preserved in a linear ratio for small matrices, but for large matrices the sparsity isn’t much better than the worst-case sparsity of $0.5$.
Here are a few pathological examples:
# Poorly conditioned
B = np.array([[1e-13, 0,1,],
[0,1e-26, 0,],
[1e67,0,-1e-27]])
sparse_LU(B)
# Tiny pivots
B = np.random.randn(4, 4)/1e16
sparse_LU(B)
!jupyter nbconvert linalg.ipynb